#### installing packages if not installed ####

list.of.packages <- c('tidyverse',
                      'meta',
                      'ggplot2',
                      'lubridate',
                      'estimatr',
                      'ggthemes',
                      'readr',
                      'readxl',
                      'stringr',
                      'rdrobust',
                      'gridExtra')
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

#### attaching libraries ####

suppressPackageStartupMessages(
  
  {
    
    library(tidyverse)
    library(meta)
    library(ggplot2)
    library(lubridate)
    library(estimatr)    
    library(ggthemes)
    library(readr)
    library(readxl)
    library(stringr)
    library(rdrobust)
    library(gridExtra)
    
  }
  
)

#### list of covariates #### 

# temperature (DONE)
# wind speed (DONE)
# precipitation (DONE)
# 311 calls (DONE)
# COVID deaths per day 

#### loading in 311 call data #### 

# austin 

aus311 = 
  read_csv("data/balcovs/calls311/austin_311/Austin_311_Public_Data_20240722.csv")

aus311$date = 
  (substring(aus311$`Created Date`, 1, 10) %>% 
  str_split("/") %>% 
  do.call(rbind.data.frame, .) %>% 
  `colnames<-` (c("month", 'day', 'year')) %>% 
  mutate(date = as.Date(paste(year, month, day, sep = '-'))))$date
aus311$City = tolower(aus311$City)
aus311 = aus311 %>% filter(grepl(x = City, "austin"))

aus311_ts = 
  aus311 %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

# los angeles

list_of_files = list.files('data/balcovs/calls311/la_311')
la_list_311 = as.list(rep(NA, length(list_of_files)))

for (i in 1:length(la_list_311)) {
  
  print(paste0("I iteration ", i))
  
  la_list_311[[i]] = 
    read_csv(paste0('data/balcovs/calls311/la_311/', list_of_files[i]))
  
  la_list_311[[i]]$date = 
    as.Date((substring(la_list_311[[i]]$CreatedDate, 1, 10) %>% 
               str_split("/") %>% 
               do.call(rbind.data.frame, .) %>% 
               `colnames<-` (c("month", "day", 'year')) %>% 
               mutate(date = as.Date(paste(year, month,
                                           day, sep = '-'))))$date)
  
  la_list_311[[i]] = 
    la_list_311[[i]] %>% 
    mutate(count = 1) %>% 
    group_by(date) %>% 
    summarize(count = sum(count, na.rm = TRUE)) %>% 
    mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
    mutate(blmrv = date - as.Date("2020-05-28"))
  
}

la_df_311 = 
  la_list_311 %>% 
  do.call(rbind.data.frame, .)

# philadelphia 

list_of_files = list.files('data/balcovs/calls311/phl_311')
phl_list_311 = as.list(rep(NA, length(list_of_files)))

for (i in 1:length(phl_list_311)) {
  
  print(paste0("I iteration ", i))
  
  phl_list_311[[i]] = 
    read_csv(paste0('data/balcovs/calls311/phl_311/', list_of_files[i]))
  
  phl_list_311[[i]]$date = 
    as.Date(substring(phl_list_311[[i]]$requested_datetime, 1, 10))
  
  phl_list_311[[i]] = 
    phl_list_311[[i]] %>% 
    filter(service_name != "Information Request") %>% 
    mutate(count = 1) %>% 
    group_by(date) %>% 
    summarize(count = sum(count, na.rm = TRUE)) %>% 
    mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
    mutate(blmrv = date - as.Date("2020-05-29"))
  
}

phl_df_311 = 
  phl_list_311 %>% 
  do.call(rbind.data.frame, .) %>% 
  mutate(date = as.Date(date)) %>% 
  filter(date >= as.Date("2015-03-01")) %>% 
  filter(date <= as.Date('2022-03-01'))

# seattle

sea_311 = 
  read_csv('data/balcovs/calls311/sea_311/CSR_Public_Requests_20240806.csv')

sea_311$date = 
  as.Date((sea_311$`Submitted Date` %>% 
  str_split('/') %>% 
  do.call(rbind.data.frame, .) %>% 
  `colnames<-` (c("month", 'day', 'year')) %>% 
  mutate(date = paste(year, month, day, sep = '-')))$date)

sea_311 = 
  sea_311 %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29")) %>% 
  filter(date >= as.Date("2015-03-01")) %>% 
  filter(date <= as.Date("2022-02-01"))

#### loading in weather data #### 

# austin

aus_temp = read_csv("data/balcovs/weather/austin/3766591.csv")

aus_temp$date = as.Date(aus_temp$DATE)

aus_temp = 
  aus_temp %>% 
  group_by(date) %>% 
  summarize(TMAX = mean(TMAX, na.rm = TRUE),
            TMIN = mean(TMIN, na.rm = TRUE),
            PRCP = mean(PRCP, na.rm = TRUE),
            AWND = mean(AWND, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

# los angeles

la_temp = read_csv("data/balcovs/weather/los_angeles/3766593.csv")

la_temp$date = as.Date(la_temp$DATE)

la_temp = 
  la_temp %>% 
  group_by(date) %>% 
  summarize(TMAX = mean(TMAX, na.rm = TRUE),
            TAVG = mean(TAVG, na.rm = TRUE),
            TMIN = mean(TMIN, na.rm = TRUE),
            PRCP = mean(PRCP, na.rm = TRUE),
            AWND = mean(AWND, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28")) 

# philadelphia

phl_temp1 = read_csv("data/balcovs/weather/philadelphia/3766596.csv")
phl_temp2 = read_csv("data/balcovs/weather/philadelphia/3770942.csv")

phl_temp1$date = as.Date(phl_temp1$DATE)
phl_temp2$date = as.Date(phl_temp2$DATE)

phl_temp1 = 
  phl_temp1 %>% 
  group_by(date) %>% 
  summarize(TMAX = mean(TMAX, na.rm = TRUE),
            TMIN = mean(TMIN, na.rm = TRUE),
            PRCP = mean(PRCP, na.rm = TRUE),
            AWND = mean(AWND, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

phl_temp2 = 
  phl_temp2 %>% 
  group_by(date) %>% 
  summarize(TMAX = mean(TMAX, na.rm = TRUE),
            TMIN = mean(TMIN, na.rm = TRUE),
            PRCP = mean(PRCP, na.rm = TRUE),
            AWND = mean(AWND, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

phl_temp1 = 
  phl_temp1 %>% filter(date >= as.Date("2019-04-30"))

phl_temp = bind_rows(phl_temp2, phl_temp1)

# seattle

sea_temp = read_csv("data/balcovs/weather/seattle/3766578.csv")

sea_temp$date = as.Date(sea_temp$DATE)

sea_temp = 
  sea_temp %>% 
  group_by(date) %>% 
  summarize(TMAX = mean(TMAX, na.rm = TRUE),
            TMIN = mean(TMIN, na.rm = TRUE),
            PRCP = mean(PRCP, na.rm = TRUE),
            AWND = mean(AWND, na.rm = TRUE)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

#### loading in COVID death data #### 

# reading in austin data 

aus_covid = 
  read_excel("data/balcovs/covid/austin/Texas COVID-19 New Confirmed Cases by County.xlsx",
             sheet = 1) %>% 
  slice(2:(nrow(.)-4))

colnames(aus_covid) = as.character(aus_covid[1, ])
# as.Date('2020-03-06')
aus_covid = 
  aus_covid %>% 
  filter(County == 'Travis') %>% 
  t %>% 
  data.frame()

aus_covid = aus_covid[2:(nrow(aus_covid)-2), ]
aus_covid = 
  data.frame(
  cases = aus_covid,
  date = seq.Date(from = as.Date("2020-03-06"), to = as.Date("2020-03-06")+300, by = 'day')
) 

aus_covid2 = 
  read_excel("data/balcovs/covid/austin/Texas COVID-19 New Confirmed Cases by County.xlsx",
             sheet = 2) %>% 
  slice(2:(nrow(.)-4))

colnames(aus_covid2) = as.character(aus_covid2[1, ])
# as.Date('2020-03-06')
aus_covid2 = 
  aus_covid2 %>% 
  filter(County == 'Travis') %>% 
  t %>% 
  data.frame()
aus_covid2 = aus_covid2[2:(nrow(aus_covid2)-2), ]

aus_covid2 = 
  data.frame(
    cases = aus_covid2,
    date = seq.Date(from = as.Date("2021-01-01"), to = as.Date("2021-12-31"), by = 'day')
  ) 

aus_covid = 
  bind_rows(aus_covid, aus_covid2) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

aus_covid$cases = as.numeric(aus_covid$cases)

# reading in los angeles data 

la_covid = 
  read_csv("data/balcovs/covid/los_angeles/LA_County_Covid19_cases_deaths_date_table.csv") 

la_covid$date = as.Date(la_covid$date_use)

la_covid = 
  la_covid %>% 
  dplyr::select(date, new_case) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28"))

# reading in philadelphia data 

phl_covid = read_csv("data/balcovs/covid/philadelphia/covid_cases_by_date.csv")
phl_covid$date = as.Date(substring(phl_covid$collection_date, 1, 10))

phl_covid = 
  phl_covid %>% 
  filter(date <= as.Date("2024-01-01")) %>% 
  filter(test_result == "positive") %>% 
  dplyr::select(date, count) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28"))

# reading in seattle data

sea_covid = 
  read_excel("data/balcovs/covid/seattle_king_cty/COVID19_King-County.xlsx",
           sheet = 6)
sea_covid$date = as.Date(sea_covid$date)
sea_covid = 
  sea_covid %>% dplyr::select(date, case_count) %>% 
  filter(date <= as.Date("2023-01-01")) %>% 
  dplyr::select(date, case_count) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

#### generating balance tests for respective cities ####

# austin

balout1 = with(aus311_ts, rdrobust(x = blmrv, y = scale(count), p = 1, kernel = 'uni'))
balout2 = with(aus_temp, rdrobust(x = blmrv, y = scale(TMAX), p = 1, kernel = 'uni'))
balout3 = with(aus_temp, rdrobust(x = blmrv, y = scale(TMIN), p = 1, kernel = 'uni'))
balout4 = with(aus_temp, rdrobust(x = blmrv, y = scale(PRCP), p = 1, kernel = 'uni'))
balout5 = with(aus_temp, rdrobust(x = blmrv, y = scale(AWND), p = 1, kernel = 'uni'))
balout6 = with(aus_covid, rdrobust(x = blmrv, y = scale(cases), p = 1, kernel = 'uni'))

df_bal_aus = data.frame(
  est = c(balout1$coef[1], balout2$coef[1], balout3$coef[1],
          balout4$coef[1], balout5$coef[1], balout6$coef[1]),
  se = c(balout1$se[3], balout2$se[3], balout3$se[3],
         balout4$se[3], balout5$se[3], balout6$se[3]),
  pv = c(balout1$pv[3], balout2$pv[3], balout3$pv[3],
         balout4$pv[3], balout5$pv[3], balout6$pv[3]),
  outcome = factor(c("311 Calls", "Temp. (Max.)", "Temp. (Min.)",
                     "Precip.", "Wind", "COVID\nCases"),
                   levels = rev(c("Temp. (Max.)", "Temp. (Min.)", "Precip.",
                              "Wind", "311 Calls", "COVID\nCases")))
)

bplot1 = df_bal_aus %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = outcome, y = est)) + 
  geom_errorbar(aes(x = outcome, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  coord_flip() + 
  ylim(-2, 2) + 
  labs(y = "RDiT Coefficient (BLM Protest)",
       x = "Balance Covariate",
       title = "A. Balance Test (Austin)") + 
  theme_tufte() + 
  theme(axis.text.y = element_text(hjust = 0.5))
  
# los angeles

balout1 = with(la_df_311, rdrobust(x = blmrv, y = scale(count), p = 1, kernel = 'uni'))
balout2 = with(la_temp, rdrobust(x = blmrv, y = scale(TMAX), p = 1, kernel = 'uni'))
balout3 = with(la_temp, rdrobust(x = blmrv, y = scale(TMIN), p = 1, kernel = 'uni'))
balout4 = with(la_temp, rdrobust(x = blmrv, y = scale(PRCP), p = 1, kernel = 'uni'))
balout5 = with(la_temp, rdrobust(x = blmrv, y = scale(AWND), p = 1, kernel = 'uni'))
balout6 = with(la_covid, rdrobust(x = blmrv, y = scale(new_case), p = 1, kernel = 'uni'))

df_bal_la = data.frame(
  est = c(balout1$coef[1], balout2$coef[1], balout3$coef[1],
          balout4$coef[1], balout5$coef[1], balout6$coef[1]),
  se = c(balout1$se[3], balout2$se[3], balout3$se[3],
         balout4$se[3], balout5$se[3], balout6$se[3]),
  pv = c(balout1$pv[3], balout2$pv[3], balout3$pv[3],
         balout4$pv[3], balout5$pv[3], balout6$pv[3]),
  outcome = factor(c("311 Calls", "Temp. (Max.)", "Temp. (Min.)",
                     "Precip.", "Wind", "COVID\nCases"),
                   levels = rev(c("Temp. (Max.)", "Temp. (Min.)", "Precip.",
                                  "Wind", "311 Calls", "COVID\nCases")))
)

bplot2 = df_bal_la %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = outcome, y = est)) + 
  geom_errorbar(aes(x = outcome, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  coord_flip() + 
  ylim(-2, 2) + 
  labs(y = "RDiT Coefficient (BLM Protest)",
       x = "Balance Covariate",
       title = "B. Balance Test (Los Angeles)") + 
  theme_tufte() + 
  theme(axis.text.y = element_text(hjust = 0.5))

# philadelphia

balout1 = with(phl_df_311, rdrobust(x = blmrv, y = scale(count), p = 1, kernel = 'uni'))
balout2 = with(phl_temp, rdrobust(x = blmrv, y = scale(TMAX), p = 1, kernel = 'uni'))
balout3 = with(phl_temp, rdrobust(x = blmrv, y = scale(TMIN), p = 1, kernel = 'uni'))
balout4 = with(phl_temp, rdrobust(x = blmrv, y = scale(PRCP), p = 1, kernel = 'uni'))
balout5 = with(phl_temp, rdrobust(x = blmrv, y = scale(AWND), p = 1, kernel = 'uni'))
balout6 = with(phl_covid, rdrobust(x = blmrv, y = scale(count), p = 1, kernel = 'uni'))

df_bal_phl = data.frame(
  est = c(balout1$coef[1], balout2$coef[1], balout3$coef[1],
          balout4$coef[1], balout5$coef[1], balout6$coef[1]),
  se = c(balout1$se[3], balout2$se[3], balout3$se[3],
         balout4$se[3], balout5$se[3], balout6$se[3]),
  pv = c(balout1$pv[3], balout2$pv[3], balout3$pv[3],
         balout4$pv[3], balout5$pv[3], balout6$pv[3]),
  outcome = factor(c("311 Calls", "Temp. (Max.)", "Temp. (Min.)",
                     "Precip.", "Wind", "COVID\nCases"),
                   levels = rev(c("Temp. (Max.)", "Temp. (Min.)", "Precip.",
                                  "Wind", "311 Calls", "COVID\nCases")))
)

bplot3 = df_bal_phl %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = outcome, y = est)) + 
  geom_errorbar(aes(x = outcome, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  coord_flip() + 
  ylim(-2, 2) + 
  labs(y = "RDiT Coefficient (BLM Protest)",
       x = "Balance Covariate",
       title = "C. Balance Test (Philadelphia)") + 
  theme_tufte() + 
  theme(axis.text.y = element_text(hjust = 0.5))

# seattle

balout1 = with(sea_311, rdrobust(x = blmrv, y = scale(count), p = 1, kernel = 'uni'))
balout2 = with(sea_temp, rdrobust(x = blmrv, y = scale(TMAX), p = 1, kernel = 'uni'))
balout3 = with(sea_temp, rdrobust(x = blmrv, y = scale(TMIN), p = 1, kernel = 'uni'))
balout4 = with(sea_temp, rdrobust(x = blmrv, y = scale(PRCP), p = 1, kernel = 'uni'))
balout5 = with(sea_temp, rdrobust(x = blmrv, y = scale(AWND), p = 1, kernel = 'uni'))
balout6 = with(sea_covid, rdrobust(x = blmrv, y = scale(case_count), p = 1, kernel = 'uni'))

df_bal_sea = data.frame(
  est = c(balout1$coef[1], balout2$coef[1], balout3$coef[1],
          balout4$coef[1], balout5$coef[1], balout6$coef[1]),
  se = c(balout1$se[3], balout2$se[3], balout3$se[3],
         balout4$se[3], balout5$se[3], balout6$se[3]),
  pv = c(balout1$pv[3], balout2$pv[3], balout3$pv[3],
         balout4$pv[3], balout5$pv[3], balout6$pv[3]),
  outcome = factor(c("311 Calls", "Temp. (Max.)", "Temp. (Min.)",
                     "Precip.", "Wind", "COVID\nCases"),
                   levels = rev(c("Temp. (Max.)", "Temp. (Min.)", "Precip.",
                                  "Wind", "311 Calls", "COVID\nCases")))
)

bplot4 = df_bal_sea %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = outcome, y = est),
             size = 2) + 
  geom_errorbar(aes(x = outcome, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  coord_flip() + 
  ylim(-2, 2) + 
  labs(y = "RDiT Coefficient (BLM Protest)",
       x = "Balance Covariate",
       title = "D. Balance Test (Seattle)") + 
  theme_tufte() + 
  theme(axis.text.y = element_text(hjust = 0.5))

bplot_grob = arrangeGrob(bplot1, bplot2, bplot3, bplot4, ncol = 2)
ggsave(plot = bplot_grob, filename = "pics/bplotfin.jpeg", dpi = 600,
       width = 8, height = 5)

#### re-estimation with control covariates: austin setup ####

load(file = "data/rpf.RData")

# Austin (Traffic Stops)

rpf_ts = rpf %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

rpf_ts$blmrv = 
  rpf_ts$date - min(rpf_ts$date[rpf_ts$blm == 1])

rpf_ts$count_std = 
  (rpf_ts$count - mean(rpf_ts$count, na.rm = TRUE)) / 
  sd(rpf_ts$count, na.rm = TRUE)

rpf_ts_wc = 
  merge(rpf_ts, aus311_ts %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
rpf_ts_wc = 
  merge(rpf_ts_wc, aus_temp %>% dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
rpf_ts_wc = 
  merge(rpf_ts_wc, aus_covid %>% dplyr::select(date, cases) %>% rename(covidcases = cases),
        by = 'date', all.x = TRUE)

rpf_ts_wc$covidcases = 
  ifelse(is.na(rpf_ts_wc$covidcases), 0, rpf_ts_wc$covidcases)

#### re-estimation with control covariates: Philadelphia setup ####

# loading in data

load(file = "data/ph_stops_ped.RData")
load(file = "data/ph_stops_veh.RData")

# Philly (Pedestrian Stops)

stops_ped_ts = stops_ped %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

stops_ped_ts$blmrv = 
  stops_ped_ts$date - min(stops_ped_ts$date[stops_ped_ts$blm == 1])

stops_ped_ts = 
  stops_ped_ts %>% filter(date >= as.Date("2018-01-01"))

stops_ped_ts$count_std = 
  (stops_ped_ts$count - mean(stops_ped_ts$count, na.rm = TRUE)) / 
  sd(stops_ped_ts$count, na.rm = TRUE)

stops_ped_phl_ts = stops_ped_ts

# Philly (Traffic Stops)

stops_veh_ts = stops_veh %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

stops_veh_ts$blmrv = 
  stops_veh_ts$date - min(stops_veh_ts$date[stops_veh_ts$blm == 1])

stops_veh_ts = 
  stops_veh_ts %>% filter(date >= as.Date("2018-01-01"))

stops_veh_ts$count_std = 
  (stops_veh_ts$count - mean(stops_veh_ts$count, na.rm = TRUE)) / 
  sd(stops_veh_ts$count, na.rm = TRUE)

stops_veh_phl_ts = stops_veh_ts

# merging 

stops_veh_phl_ts_wc = 
  merge(stops_veh_phl_ts, phl_df_311 %>% rename(count311 = count) %>% 
          dplyr::select(date, count311),
        by = 'date')
stops_veh_phl_ts_wc = 
  merge(stops_veh_phl_ts_wc, phl_temp %>% 
          dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
stops_veh_phl_ts_wc = 
  merge(stops_veh_phl_ts_wc, phl_covid %>% dplyr::select(date, count) %>% 
          rename(covidcases = count),
        by = 'date', all.x = TRUE)
stops_veh_phl_ts_wc$covidcases = 
  ifelse(is.na(stops_veh_phl_ts_wc$covidcases), 0, stops_veh_phl_ts_wc$covidcases)

stops_ped_phl_ts_wc = 
  merge(stops_ped_phl_ts, phl_df_311 %>% rename(count311 = count) %>% 
          dplyr::select(date, count311),
        by = 'date')
stops_ped_phl_ts_wc = 
  merge(stops_ped_phl_ts_wc, phl_temp %>% 
          dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
stops_ped_phl_ts_wc = 
  merge(stops_ped_phl_ts_wc, phl_covid %>% dplyr::select(date, count) %>% 
          rename(covidcases = count),
        by = 'date', all.x = TRUE)
stops_ped_phl_ts_wc$covidcases = 
  ifelse(is.na(stops_ped_phl_ts_wc$covidcases), 0, stops_ped_phl_ts_wc$covidcases)

#### re-estimation with control covariates: los angeles setup ####

# stops 

load("data/stopdata_la.RData")

# Los Angeles (Pedestrian Stops) 

stops_ped_ts = 
  stopdat1 %>% 
  filter(traffic_stop == 0) %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

stops_ped_ts$blmrv = 
  stops_ped_ts$date - min(stops_ped_ts$date[stops_ped_ts$blm == 1])

stops_ped_ts$count_std = 
  (stops_ped_ts$count - mean(stops_ped_ts$count, na.rm = TRUE)) / 
  sd(stops_ped_ts$count, na.rm = TRUE)

# Los Angeles (Traffic Stops) 

stops_veh_ts = stopdat1 %>% 
  filter(traffic_stop == 1) %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

stops_veh_ts$blmrv = 
  stops_veh_ts$date - min(stops_veh_ts$date[stops_veh_ts$blm == 1])

stops_veh_ts$count_std = 
  (stops_veh_ts$count - mean(stops_veh_ts$count, na.rm = TRUE)) / 
  sd(stops_veh_ts$count, na.rm = TRUE)

# merging 

stops_veh_ts_wc = 
  merge(stops_veh_ts, la_df_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
stops_veh_ts_wc = 
  merge(stops_veh_ts_wc, la_temp %>% dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
stops_veh_ts_wc = 
  merge(stops_veh_ts_wc, la_covid %>% dplyr::select(date, new_case) %>% rename(covidcases = new_case),
        by = 'date', all.x = TRUE)
stops_veh_ts_wc$covidcases = 
  ifelse(is.na(stops_veh_ts_wc$covidcases), 0, stops_veh_ts_wc$covidcases)


stops_ped_ts_wc = 
  merge(stops_ped_ts, la_df_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
stops_ped_ts_wc = 
  merge(stops_ped_ts_wc, la_temp %>% dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
stops_ped_ts_wc = 
  merge(stops_ped_ts_wc, la_covid %>% dplyr::select(date, new_case) %>% rename(covidcases = new_case),
        by = 'date', all.x = TRUE)

stops_ped_ts_wc$covidcases = 
  ifelse(is.na(stops_ped_ts_wc$covidcases), 0, stops_ped_ts_wc$covidcases)

#### re-estimation with control covariates: seattle setup ####

load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

terry_desc = terry %>% 
  filter(initiate_off == 1) %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm)) 

terry_desc$blmrv = 
  terry_desc$date - min(terry_desc$date[terry_desc$blm == 1])

terry_desc$count_std = 
  (terry_desc$count - mean(terry_desc$count, na.rm = TRUE)) / 
  sd(terry_desc$count, na.rm = TRUE)

terry_desc_wc = 
  merge(terry_desc, 
      sea_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
      by = 'date')
terry_desc_wc = merge(terry_desc_wc, sea_temp %>% dplyr::select(1:5), by = 'date')
terry_desc_wc = merge(terry_desc_wc, (sea_covid %>% 
  rename(countcovid = case_count) %>% 
  dplyr::select(date, countcovid)), by = 'date', all.x = TRUE)

terry_desc_wc$countcovid = 
  ifelse(is.na(terry_desc_wc$countcovid), 0, terry_desc_wc$countcovid)

#### estimation with control covariates: depolicing ####

reest1 = with(rpf_ts_wc, rdrobust(x = blmrv, y = scale(count),
                                  p = 1, kernel = 'uni',
                         covs = cbind(rpf_ts_wc$count311,
                                      rpf_ts_wc$count_covid,
                                      rpf_ts_wc$TMAX,
                                      rpf_ts_wc$TMIN,
                                      rpf_ts_wc$PRCP,
                                      rpf_ts_wc$AWND)))

reest2 = with(stops_ped_ts_wc, rdrobust(x = blmrv, y = scale(count),
                                        p = 1, kernel = 'uni',
                         covs = cbind(stops_ped_ts_wc$count311,
                                      stops_ped_ts_wc$count_covid,
                                      stops_ped_ts_wc$TMAX,
                                      stops_ped_ts_wc$TMIN,
                                      stops_ped_ts_wc$PRCP,
                                      stops_ped_ts_wc$AWND)))

reest3 = with(stops_veh_ts_wc, rdrobust(x = blmrv, y = scale(count),
                                        p = 1, kernel = 'uni',
                               covs = cbind(stops_veh_ts_wc$count311,
                                            stops_veh_ts_wc$count_covid,
                                            stops_veh_ts_wc$TMAX,
                                            stops_veh_ts_wc$TMIN,
                                            stops_veh_ts_wc$PRCP,
                                            stops_veh_ts_wc$AWND)))

reest4 = with(stops_ped_phl_ts_wc, rdrobust(x = blmrv, y = scale(count),
                                            p = 1, kernel = 'uni',
                               covs = cbind(stops_ped_phl_ts_wc$count311,
                                            stops_ped_phl_ts_wc$count_covid,
                                            stops_ped_phl_ts_wc$TMAX,
                                            stops_ped_phl_ts_wc$TMIN,
                                            stops_ped_phl_ts_wc$PRCP,
                                            stops_ped_phl_ts_wc$AWND)))

reest5 = with(stops_veh_phl_ts_wc, rdrobust(x = blmrv, y = scale(count),
                                            p = 1, kernel = 'uni',
                               covs = cbind(stops_veh_phl_ts_wc$count311,
                                            stops_veh_phl_ts_wc$count_covid,
                                            stops_veh_phl_ts_wc$TMAX,
                                            stops_veh_phl_ts_wc$TMIN,
                                            stops_veh_phl_ts_wc$PRCP,
                                            stops_veh_phl_ts_wc$AWND)))

reest6 = with(terry_desc_wc, rdrobust(x = blmrv, y = scale(count), 
                                      p = 1, kernel = 'uni',
                             covs = cbind(terry_desc_wc$count311,
                                          terry_desc_wc$count_covid,
                                          terry_desc_wc$TMAX,
                                          terry_desc_wc$TMIN,
                                          terry_desc_wc$PRCP,
                                          terry_desc_wc$AWND)))

df_coef_plot = 
  data.frame(
    
    est = c(reest1$coef[1], reest2$coef[1], reest3$coef[1],
            reest4$coef[1], reest5$coef[1], reest6$coef[1]),
    
    se = c(reest1$se[1], reest2$se[1], reest3$se[1],
           reest4$se[1], reest5$se[1], reest6$se[1]),
    
    pv = c(reest1$pv[1], reest2$pv[1], reest3$pv[1],
           reest4$pv[1], reest5$pv[1], reest6$pv[1]),
    
    city = c("AUS", "LA", "LA", "PHL", "PHL", "SEA"),
    
    outcome = factor(c("Traffic Stops", "Pedestrian Stops", "Traffic Stops",
                       "Pedestrian Stops", "Traffic Stops", "Terry Stops"),
                     levels = c("Traffic Stops", "Pedestrian Stops", "Terry Stops"))
    
  )


df_coef_plot_meta = 
  df_coef_plot %>% 
  filter(outcome != "Officer Calls") %>% 
  group_by(city) %>% 
  summarize(est = mean(est),
            se = mean(se))

meta_res = 
  metagen(TE = df_coef_plot_meta$est,
          seTE = df_coef_plot_meta$se, 
          sm = "SMD")


df_coef_plot_plot = df_coef_plot %>% 
  filter(outcome != "Officer Calls") %>% 
  bind_rows(., 
            data.frame(
              est = meta_res$TE.random,
              se = meta_res$seTE.random,
              pv = meta_res$pval.random,
              city = "Meta-Analysis",
              outcome = "Meta-Analysis"
            )
  ) %>% 
  mutate(city = factor(city,
                       levels = c("AUS",  "CHI", "LA", "PHL", "SEA", "Meta-Analysis")),
         outcome = factor(outcome,
                          levels = c("Traffic Stops", "Pedestrian Stops",
                                     "Terry Stops", "Meta-Analysis")))

df_coef_plot_plot %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = city, y = est, shape = outcome),
             position = position_dodge(.35)) +
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se,
                    shape = outcome),
                width = 0,
                position = position_dodge(.35),
                size = .2) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se,
                    shape = outcome),
                width = 0,
                position = position_dodge(.35),
                size = .4) + 
  labs(x = "City", y = "RDiT Coefficient\n(BLM Protest)",
       shape = "Outcome") + 
  theme_tufte(base_size = 9)

# analysis 

ggsave(plot = last_plot(), width = 6, height = 2, filename = "pics/figure2reswc.jpeg",
       dpi = 600)

#### estimation with control covariates: other outcomes (arrest rates) ####

# key results we hang our hats on

# 1) Arrest rates: Philly + Seattle
# 2) Rate ratios: Philly + Seattle 
# 3) No effects on crime 

# austin 

load(file = "data/rpf.RData")

hit_rate_traffic_aus = rpf %>% 
  mutate(count = 1) %>% 
  mutate(hit = search_found,
         hit_alc = search_found_alc) %>% 
  mutate(arrest_sum = arrest) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            search_found_cash_m = mean(search_found_cash, na.rm = TRUE),
            search_found_substances_m = mean(search_found_substances, na.rm = TRUE),
            search_found_weapons_m = mean(search_found_weapons, na.rm = TRUE),
            search_found_cash_s = sum(search_found_cash, na.rm = TRUE),
            search_found_substances_s = sum(search_found_substances, na.rm = TRUE),
            search_found_weapons_s = sum(search_found_weapons, na.rm = TRUE),
            hit = mean(search_found, na.rm = TRUE),
            hit_alc = mean(hit_alc, na.rm = TRUE),
            arrest = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest_sum, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

hit_rate_traffic_aus$blmrv = 
  hit_rate_traffic_aus$date - 
  min(hit_rate_traffic_aus$date[hit_rate_traffic_aus$blm == 1])

# los angeles 

load("data/stopdata_la.RData")

# ped hit rate

hit_rate_ped_la = stopdat1 %>% 
  filter(reason_for_stop != "Traffic violation") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            ped_contraband = mean(contraband_recovered, na.rm = TRUE),
            ped_contraband_alc = mean(contraband_recovered_alc, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest = mean(arrest, na.rm = TRUE)) 

hit_rate_ped_la$blmrv = 
  hit_rate_ped_la$date - min(hit_rate_ped_la$date[hit_rate_ped_la$blm == 1])

# vehicle hit rate

hit_rate_veh_la = stopdat1 %>% 
  filter(reason_for_stop == "Traffic violation") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            veh_contraband = mean(contraband_recovered, na.rm = TRUE),
            veh_contraband_alc = mean(contraband_recovered_alc, na.rm = TRUE),
            veh_contraband_nalc = mean(contraband_recovered_nalc, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest_m = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest, na.rm = TRUE),
            arrest_felony_m = mean(arrest_felony, na.rm = TRUE),
            arrest_infraction_m = mean(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_m = mean(arrest_misdemeanor, na.rm = TRUE),
            arrest_felony_s = sum(arrest_felony, na.rm = TRUE),
            arrest_infraction_s = sum(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_s = sum(arrest_misdemeanor, na.rm = TRUE),
            contraband_recovered_evidence_s = sum(contraband_recovered_evidence, na.rm = TRUE),
            contraband_recovered_evidence_m = mean(contraband_recovered_evidence, na.rm = TRUE),
            contraband_recovered_substances_s = sum(contraband_recovered_substances, na.rm = TRUE),
            contraband_recovered_substances_m = mean(contraband_recovered_substances, na.rm = TRUE),
            contraband_recovered_weapons_s = sum(contraband_recovered_weapons, na.rm = TRUE),
            contraband_recovered_weapons_m = mean(contraband_recovered_weapons, na.rm = TRUE)) %>% 
  rename(arrest = arrest_m)

hit_rate_veh_la$blmrv = 
  hit_rate_veh_la$date - min(hit_rate_veh_la$date[hit_rate_veh_la$blm == 1])

# Philly stops 

load(file = "data/ph_stops_ped.RData")
load(file = "data/ph_stops_veh.RData")

# hit rates (pedestrian)

hit_rate_ped_phl = stops_ped %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            ped_contraband = mean(ped_contraband, na.rm = TRUE),
            arrest = mean(individual_arrested, na.rm = TRUE),
            arrest_sum = sum(individual_arrested, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

hit_rate_ped_phl$blmrv = 
  hit_rate_ped_phl$date - min(hit_rate_ped_phl$date[hit_rate_ped_phl$blm == 1])

# hit rates (vehicle)

hit_rate_veh_phl = stops_veh %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            veh_contraband = mean(veh_contraband, na.rm = TRUE),
            arrest = mean(individual_arrested, na.rm = TRUE),
            arrest_sum = sum(individual_arrested, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

hit_rate_veh_phl$blmrv = 
  hit_rate_veh_phl$date - min(hit_rate_veh_phl$date[hit_rate_veh_phl$blm == 1])

# Seattle Terry Stop Data

load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

hit_rate_sea = terry %>% 
  mutate(count = 1) %>% 
  filter(initiate_off == 1) %>% 
  group_by(date) %>% 
  mutate(hit = ifelse(stop_resolution == "Arrest" | 
                        stop_resolution == "Citation / Infraction" | 
                        stop_resolution == "Offense Report" | 
                        stop_resolution == "Referred for Prosecution", 1, 0)) %>% 
  summarize(hit = mean(hit, na.rm = TRUE),
            count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest_m = mean(arrest2, na.rm = TRUE),
            arrest_sum = sum(arrest2, na.rm = TRUE)) %>% 
  rename(arrest = arrest_m)

hit_rate_sea$blmrv = 
  hit_rate_sea$date - min(hit_rate_sea$date[hit_rate_sea$blm == 1])

# merging austin data

hit_rate_traffic_aus_wc = 
  merge(hit_rate_traffic_aus, 
        aus311_ts %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')

hit_rate_traffic_aus_wc = merge(hit_rate_traffic_aus_wc,
                        aus_temp %>% dplyr::select(1:5), by = 'date')
hit_rate_traffic_aus_wc = merge(hit_rate_traffic_aus_wc, 
                        (aus_covid %>% 
                           rename(countcovid = cases) %>% 
                           dplyr::select(date, countcovid)), by = 'date',
                        all.x = TRUE)
hit_rate_traffic_aus_wc$countcovid = 
  ifelse(is.na(hit_rate_traffic_aus_wc$countcovid), 0, hit_rate_traffic_aus_wc$countcovid)

# merging los angeles data 

hit_rate_veh_la_wc = 
  merge(hit_rate_veh_la, 
        la_df_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')

hit_rate_veh_la_wc = merge(hit_rate_veh_la_wc,
                                la_temp %>% dplyr::select(1:5), by = 'date')
hit_rate_veh_la_wc = merge(hit_rate_veh_la_wc, 
                                (la_covid %>% 
                                   rename(countcovid = new_case) %>% 
                                   dplyr::select(date, countcovid)), by = 'date',
                                all.x = TRUE)
hit_rate_veh_la_wc$countcovid = 
  ifelse(is.na(hit_rate_veh_la_wc$countcovid), 0, hit_rate_veh_la_wc$countcovid)

# merging seattle data

hit_rate_sea_wc = 
  merge(hit_rate_sea, 
        sea_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
hit_rate_sea_wc = merge(hit_rate_sea_wc,
                      sea_temp %>% dplyr::select(1:5), by = 'date')
hit_rate_sea_wc = merge(hit_rate_sea_wc, 
                      (sea_covid %>% 
                         rename(countcovid = case_count) %>% 
                         dplyr::select(date, countcovid)), by = 'date',
                      all.x = TRUE)
hit_rate_sea_wc$countcovid = 
  ifelse(is.na(hit_rate_sea_wc$countcovid), 0, hit_rate_sea_wc$countcovid)

# merging philly data

hit_rate_veh_phl_wc = 
  merge(hit_rate_veh_phl, phl_df_311 %>% rename(count311 = count) %>% 
          dplyr::select(date, count311),
        by = 'date')

hit_rate_veh_phl_wc = 
  merge(hit_rate_veh_phl_wc, phl_temp %>% 
          dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')

hit_rate_veh_phl_wc = 
  merge(hit_rate_veh_phl_wc, phl_covid %>% dplyr::select(date, count) %>% 
          rename(covidcases = count),
        by = 'date', all.x = TRUE)

hit_rate_veh_phl_wc$countcovid = 
  ifelse(is.na(hit_rate_veh_phl_wc$covidcases), 0, 
         hit_rate_veh_phl_wc$covidcases)


reest5 = with(hit_rate_traffic_aus_wc, rdrobust(x = blmrv, y = arrest, 
                                            p = 1, kernel = 'uni',
                                            covs = cbind(hit_rate_traffic_aus_wc$count311,
                                                         hit_rate_traffic_aus_wc$TMAX,
                                                         hit_rate_traffic_aus_wc$TMIN,
                                                         hit_rate_traffic_aus_wc$PRCP,
                                                         hit_rate_traffic_aus_wc$AWND,
                                                         hit_rate_traffic_aus_wc$countcovid)))

reest6 = with(hit_rate_veh_la_wc, rdrobust(x = blmrv, y = arrest, 
                                                p = 1, kernel = 'uni',
                                                covs = cbind(hit_rate_veh_la_wc$count311,
                                                             hit_rate_veh_la_wc$TMAX,
                                                             hit_rate_veh_la_wc$TMIN,
                                                             hit_rate_veh_la_wc$PRCP,
                                                             hit_rate_veh_la_wc$AWND,
                                                             hit_rate_veh_la_wc$countcovid)))


reest7 = with(hit_rate_veh_phl_wc, rdrobust(x = blmrv, y = arrest, 
                                       p = 1, kernel = 'uni',
                                       covs = cbind(hit_rate_veh_phl_wc$count311,
                                                    hit_rate_veh_phl_wc$TMAX,
                                                    hit_rate_veh_phl_wc$TMIN,
                                                    hit_rate_veh_phl_wc$PRCP,
                                                    hit_rate_veh_phl_wc$AWND,
                                                    hit_rate_veh_phl_wc$countcovid)))

reest8 = with(hit_rate_sea_wc, rdrobust(x = blmrv, y = arrest, 
                             p = 1, kernel = 'uni',
                             covs = cbind(hit_rate_sea_wc$count311,
                                          hit_rate_sea_wc$TMAX,
                                          hit_rate_sea_wc$TMIN,
                                          hit_rate_sea_wc$PRCP,
                                          hit_rate_sea_wc$AWND,
                                          hit_rate_sea_wc$countcovid)))


data.frame(
  est = c(reest5$coef[1], reest6$coef[1],
          reest7$coef[1], reest8$coef[1]),
  se = c(reest5$se[3], reest6$se[3],
         reest7$se[3], reest8$se[3]),
  city = c("AUS", "LA", "PHL", "SEA")
) %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = city, y = est),
             size = 2.5) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .8) + 
  labs(x = "City", y = "RDiT Coefficient (BLM Protest)",
       title = 'Effect of BLM Protest on Arrest Rate\nConditional on Controls') + 
  theme_tufte()

ggsave(plot = last_plot(), width = 4, height = 3,
       filename = 'pics/arrestrreswc.jpeg',
       dpi = 600)

#### estimation with control covariates: other outcomes (rate ratios) ####

# loading in data

load(file = "data/ph_stops_veh.RData")

# rate ratio (vehicle)

phl_rr_veh = stops_veh %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 614254),
         r_wht_rate = (r_wht / 550102),
         r_lat_rate = (r_lat / 238965)) %>% 
  mutate(veh_rr_bw = r_blk_rate / r_wht_rate,
         veh_rr_lw = r_lat_rate / r_wht_rate) %>% 
  mutate(veh_rr_bw = ifelse(veh_rr_bw == Inf | veh_rr_bw == -Inf, NA, veh_rr_bw),
         veh_rr_lw = ifelse(veh_rr_lw == Inf | veh_rr_lw == -Inf, NA, veh_rr_lw))

phl_rr_veh$blmrv = 
  phl_rr_veh$date - min(phl_rr_veh$date[phl_rr_veh$blm == 1])

phl_rr_veh_wc = 
  merge(phl_rr_veh, (phl_df_311 %>% rename(count311 = count) %>% 
          dplyr::select(date, count311)),
        by = 'date')

phl_temp2m = (phl_temp %>% 
    dplyr::select(date, TMAX, TMIN, PRCP, AWND) %>% 
    mutate(date = as.Date(date)))

phl_rr_veh_wc = 
  merge(phl_rr_veh_wc, phl_temp2m, by = 'date')

phl_rr_veh_wc = 
  merge(phl_rr_veh_wc, phl_covid %>% dplyr::select(date, count) %>% 
          rename(covidcases = count), all.x = TRUE)

phl_rr_veh_wc$countcovid = 
  ifelse(is.na(phl_rr_veh_wc$covidcases), 0, 
         phl_rr_veh_wc$covidcases)

reest9 = with(phl_rr_veh_wc, rdrobust(x = blmrv, y = veh_rr_bw, 
                                        p = 1, kernel = 'uni',
                                        covs = cbind(phl_rr_veh_wc$count311,
                                                     phl_rr_veh_wc$TMAX,
                                                     phl_rr_veh_wc$TMIN,
                                                     phl_rr_veh_wc$PRCP,
                                                     phl_rr_veh_wc$AWND,
                                                     phl_rr_veh_wc$countcovid)))

load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

sea_rr = 
  terry %>% 
  mutate(count = 1,
         r_unk = ifelse(subject_race == "Unknown" | subject_race == "-", 1, 0),
         r_wht = ifelse(subject_race == "White", 1, 0),
         r_asn = ifelse(subject_race == "Asian", 1, 0),
         r_blk = ifelse(subject_race == "Black or African American", 1, 0)) %>% 
  filter(initiate_off == 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest = mean(arrest, na.rm = TRUE),
            r_wht = sum(r_wht, na.rm = TRUE),
            r_blk = sum(r_blk, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2019-06-01")) %>% 
  mutate(wht_rate = (r_wht / (.673 * 737015)) * 10000,
         blk_rate = (r_blk / (.073 * 737015)) * 10000) %>% 
  mutate(rr_bw = blk_rate / wht_rate) %>% 
  mutate(rr_bw = ifelse(rr_bw == Inf | is.na(rr_bw), NA, rr_bw)) 

sea_rr$blmrv = 
  sea_rr$date - min(sea_rr$date[sea_rr$blm == 1])

sea_rr_wc = 
  merge(sea_rr, 
        sea_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
sea_rr_wc = merge(sea_rr_wc, sea_temp %>% dplyr::select(1:5), by = 'date')
sea_rr_wc = merge(sea_rr_wc, (sea_covid %>% 
                                        rename(countcovid = case_count) %>% 
                                        dplyr::select(date, countcovid)), by = 'date', all.x = TRUE)

sea_rr_wc$countcovid = 
  ifelse(is.na(sea_rr_wc$countcovid), 0, sea_rr_wc$countcovid)


reest10 = with(sea_rr_wc, rdrobust(x = blmrv, y = rr_bw, 
                                      p = 1, kernel = 'tri',
                                      covs = cbind(sea_rr_wc$count311,
                                                   sea_rr_wc$TMAX,
                                                   sea_rr_wc$TMIN,
                                                   sea_rr_wc$PRCP,
                                                   sea_rr_wc$AWND,
                                                   sea_rr_wc$countcovid)))


data.frame(
  est = c(reest9$coef[3], reest10$coef[1]),
  se = c(reest9$se[3], reest10$se[3]),
  city = c("PHL", "SEA")
) %>% 
  ggplot() + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(x = city, y = est),
             size = 2.5) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .6) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .8) + 
  labs(x = "City", y = "RDiT Coefficient (BLM Protest)",
       title = 'Effect of BLM Protest on B/W Rate Ratio\nConditional on Controls') + 
  theme_tufte()

ggsave(plot = last_plot(), width = 4, height = 3,
       filename = 'pics/bwrrwc.jpeg',
       dpi = 600)

#### estimation with control covariates: other outcomes (crime) ####

#### loading in crime data: aus #### 

load(file = "data/crimeatx.RData")

crime_aus = crime_atx %>% 
  mutate(crime_person = ifelse(crime_category == "person", 1, 0),
         crime_property = ifelse(crime_category == "property", 1, 0),
         crime_society = ifelse(crime_category == "society", 1, 0)) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            crime_person = sum(crime_person, na.rm = TRUE),
            crime_property = sum(crime_property, na.rm = TRUE),
            crime_society = sum(crime_society, na.rm = TRUE)) 

crime_aus$blmrv = 
  crime_aus$date - min(crime_aus$date[crime_aus$blm == 1])

crime_aus_wc = 
  merge(crime_aus, aus311_ts %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
crime_aus_wc = 
  merge(crime_aus_wc, aus_temp %>% dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
crime_aus_wc = 
  merge(crime_aus_wc, aus_covid %>% dplyr::select(date, cases) %>% rename(covidcases = cases),
        by = 'date', all.x = TRUE)

crime_aus_wc$covidcases = 
  ifelse(is.na(crime_aus_wc$covidcases), 0, crime_aus_wc$covidcases)

#### loading in crime data: los angeles ####

# crime

load("data/crimedat_la.RData")

# cleaning data

# crime 

crime_la = crime_f %>% 
  mutate(date = as.Date(date)) %>% 
  mutate(crime_violent = ifelse(crime_type == "violent", 1, 0),
         crime_property = ifelse(crime_type == "property", 1, 0),
         crime_society = ifelse(crime_type == "society", 1, 0)) %>% 
  dplyr::group_by(date) %>% 
  summarize(crime_violent = sum(crime_violent),
            crime_property = sum(crime_property),
            crime_society = sum(crime_society),
            blm = mean(blm)) 

crime_la$blmrv = 
  crime_la$date - min(crime_la$date[crime_la$blm == 1])

crime_la_wc = 
  merge(crime_la, la_df_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
crime_la_wc = 
  merge(crime_la_wc, la_temp %>% dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
crime_la_wc = 
  merge(crime_la_wc, la_covid %>% dplyr::select(date, new_case) %>% rename(covidcases = new_case),
        by = 'date', all.x = TRUE)
crime_la_wc$covidcases = 
  ifelse(is.na(crime_la_wc$covidcases), 0, crime_la_wc$covidcases)


#### loading in crime data: philadelphia ####

load(file = "data/crimeph.RData")

crime_phi = ph_crime_df %>% 
  mutate(date = as.Date(dispatch_date)) %>% 
  dplyr::group_by(date) %>% 
  summarize(crime_violent = sum(crime_violent),
            crime_property = sum(crime_property),
            crime_society = sum(crime_society),
            blm = mean(blm)) 

crime_phi$blmrv = 
  crime_phi$date - min(crime_phi$date[crime_phi$blm == 1])

crime_phi_wc = 
  merge(crime_phi, phl_df_311 %>% rename(count311 = count) %>% 
          dplyr::select(date, count311),
        by = 'date')
crime_phi_wc = 
  merge(crime_phi_wc, phl_temp %>% 
          dplyr::select(date, TMAX, TMIN, PRCP, AWND),
        by = 'date')
crime_phi_wc = 
  merge(crime_phi_wc, phl_covid %>% dplyr::select(date, count) %>% 
          rename(covidcases = count),
        by = 'date', all.x = TRUE)
crime_phi_wc$covidcases = 
  ifelse(is.na(crime_phi_wc$covidcases), 0, crime_phi_wc$covidcases)

#### loading in crime data: seattle ####

load(file = "data/crimedata.RData")

crime = 
  crime %>% 
  mutate(date = as.Date(paste(substring(report_date, 7, 10),
                              substring(report_date, 1, 2),
                              substring(report_date, 4, 5), sep = "-"))) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(cat_society = ifelse(crime_against_category == "SOCIETY", 1, 0),
         cat_property = ifelse(crime_against_category == "PROPERTY", 1, 0),
         cat_person = ifelse(crime_against_category == "PERSON", 1, 0),
         cat_notcrime = ifelse(crime_against_category == "NOT_A_CRIME", 1, 0)) %>% 
  filter(offense != "Identity Theft")

crime_sea = crime %>% 
  group_by(date) %>% 
  summarize(cat_person = sum(cat_person),
            cat_property = sum(cat_property),
            cat_society = sum(cat_society), 
            blm = mean(blm))

crime_sea$blmrv = 
  crime_sea$date - min(crime_sea$date[crime_sea$blm == 1])


crime_sea_wc = 
  merge(crime_sea, 
        sea_311 %>% rename(count311 = count) %>% dplyr::select(date, count311),
        by = 'date')
crime_sea_wc = 
  merge(crime_sea_wc, sea_temp %>% dplyr::select(1:5), by = 'date')
crime_sea_wc = 
  merge(crime_sea_wc, (sea_covid %>% 
                         rename(countcovid = case_count) %>% 
                                        dplyr::select(date, countcovid)), by = 'date', all.x = TRUE)

crime_sea_wc$countcovid = 
  ifelse(is.na(crime_sea_wc$countcovid), 0, crime_sea_wc$countcovid)

#### estimation: crime #### 

# against society crime

asc1 = 
  with(crime_aus_wc, rdrobust(x = blmrv, y = scale(crime_society),
                            covs = cbind(crime_aus_wc$count311,
                                         crime_aus_wc$TMAX,
                                         crime_aus_wc$TMIN,
                                         crime_aus_wc$PRCP,
                                         crime_aus_wc$AWND,
                                         crime_aus_wc$countcovid), p = 1,
                            kernel = 'uni'))

asc2 = 
  with(crime_la_wc, rdrobust(x = blmrv, y = scale(crime_society),
                              covs = cbind(crime_la_wc$count311,
                                           crime_la_wc$TMAX,
                                           crime_la_wc$TMIN,
                                           crime_la_wc$PRCP,
                                           crime_la_wc$AWND,
                                           crime_la_wc$countcovid), p = 1,
                              kernel = 'uni'))

asc3 = 
  with(crime_phi_wc, rdrobust(x = blmrv, y = scale(crime_society),
                             covs = cbind(crime_phi_wc$count311,
                                          crime_phi_wc$TMAX,
                                          crime_phi_wc$TMIN,
                                          crime_phi_wc$PRCP,
                                          crime_phi_wc$AWND,
                                          crime_phi_wc$countcovid), p = 1,
                             kernel = 'uni'))

asc4 = 
  with(crime_sea_wc, rdrobust(x = blmrv, y = scale(cat_society),
                              covs = cbind(crime_sea_wc$count311,
                                           crime_sea_wc$TMAX,
                                           crime_sea_wc$TMIN,
                                           crime_sea_wc$PRCP,
                                           crime_sea_wc$AWND,
                                           crime_sea_wc$countcovid), p = 1,
                              kernel = 'uni'))


# property crime 

apc1 = 
  with(crime_aus_wc, rdrobust(x = blmrv, y = scale(crime_property),
                              covs = cbind(crime_aus_wc$count311,
                                           crime_aus_wc$TMAX,
                                           crime_aus_wc$TMIN,
                                           crime_aus_wc$PRCP,
                                           crime_aus_wc$AWND,
                                           crime_aus_wc$countcovid), p = 1,
                              kernel = 'uni'))

apc2 = 
  with(crime_la_wc, rdrobust(x = blmrv, y = scale(crime_property),
                             covs = cbind(crime_la_wc$count311,
                                          crime_la_wc$TMAX,
                                          crime_la_wc$TMIN,
                                          crime_la_wc$PRCP,
                                          crime_la_wc$AWND,
                                          crime_la_wc$countcovid), p = 1,
                             kernel = 'uni'))

apc3 = 
  with(crime_phi_wc, rdrobust(x = blmrv, y = scale(crime_property),
                              covs = cbind(crime_phi_wc$count311,
                                           crime_phi_wc$TMAX,
                                           crime_phi_wc$TMIN,
                                           crime_phi_wc$PRCP,
                                           crime_phi_wc$AWND,
                                           crime_phi_wc$countcovid), p = 1,
                              kernel = 'uni'))

apc4 = 
  with(crime_sea_wc, rdrobust(x = blmrv, y = scale(cat_property),
                             covs = cbind(crime_sea_wc$count311,
                                          crime_sea_wc$TMAX,
                                          crime_sea_wc$TMIN,
                                          crime_sea_wc$PRCP,
                                          crime_sea_wc$AWND,
                                          crime_sea_wc$countcovid), p = 1,
                             kernel = 'uni'))

# violent crime 

avc1 = 
  with(crime_aus_wc, rdrobust(x = blmrv, y = scale(crime_person),
                              covs = cbind(crime_aus_wc$count311,
                                           crime_aus_wc$TMAX,
                                           crime_aus_wc$TMIN,
                                           crime_aus_wc$PRCP,
                                           crime_aus_wc$AWND,
                                           crime_aus_wc$countcovid), p = 1,
                              kernel = 'uni'))

avc2 = 
  with(crime_la_wc, rdrobust(x = blmrv, y = scale(crime_violent),
                             covs = cbind(crime_la_wc$count311,
                                          crime_la_wc$TMAX,
                                          crime_la_wc$TMIN,
                                          crime_la_wc$PRCP,
                                          crime_la_wc$AWND,
                                          crime_la_wc$countcovid), p = 1,
                             kernel = 'uni'))

avc3 = 
  with(crime_phi_wc, rdrobust(x = blmrv, y = scale(crime_violent),
                              covs = cbind(crime_phi_wc$count311,
                                           crime_phi_wc$TMAX,
                                           crime_phi_wc$TMIN,
                                           crime_phi_wc$PRCP,
                                           crime_phi_wc$AWND,
                                           crime_phi_wc$countcovid), p = 1,
                              kernel = 'uni'))

avc4 = 
  with(crime_sea_wc, rdrobust(x = blmrv, y = scale(cat_person),
                              covs = cbind(crime_sea_wc$count311,
                                           crime_sea_wc$TMAX,
                                           crime_sea_wc$TMIN,
                                           crime_sea_wc$PRCP,
                                           crime_sea_wc$AWND,
                                           crime_sea_wc$countcovid), p = 1,
                              kernel = 'uni'))


# plot 

df_crime_plot = 
  data.frame(
    
    est = c(asc1$coef[1], apc1$coef[1], avc1$coef[1],
            asc2$coef[1], apc2$coef[1], avc2$coef[1],
            asc3$coef[1], apc3$coef[1], avc3$coef[1],
            asc4$coef[1], apc4$coef[1], avc4$coef[1]),
    
    se = c(asc1$se[3], apc1$se[3], avc1$se[3],
           asc2$se[3], apc2$se[3], avc2$se[3],
           asc3$se[3], apc3$se[3], avc3$se[3],
           asc4$se[3], apc4$se[3], avc4$se[3]),
    
    pv = c(asc1$pv[3], apc1$pv[3], avc1$pv[3],
           asc2$pv[3], apc2$pv[3], avc2$pv[3],
           asc3$pv[3], apc3$pv[3], avc3$pv[3],
           asc4$pv[3], apc4$pv[3], avc4$pv[3]),
    
    crime_type = factor(rep(c("Crime (Society)",
                              "Crime (Property)",
                              "Crime (Violent)"), 4),
                        levels = c("Crime (Violent)",
                                   "Crime (Property)",
                                   "Crime (Society)")),
    
    city = factor(c(rep("AUS", 3), rep("LA", 3), rep("PHL", 3), rep("SEA", 3)),
                  levels = c("AUS","LA", "PHL", "SEA"))
    
    
  )


df_crime_plot_violent = df_crime_plot %>% filter(crime_type == "Crime (Violent)")
meta_res_violent = 
  metagen(TE = df_crime_plot_violent$est,
          seTE = df_crime_plot_violent$se, 
          sm = "SMD")
df_crime_plot_property = df_crime_plot %>% filter(crime_type == "Crime (Property)")
meta_res_property = 
  metagen(TE = df_crime_plot_property$est,
          seTE = df_crime_plot_property$se, 
          sm = "SMD")
df_crime_plot_society = df_crime_plot %>% filter(crime_type == "Crime (Society)")
meta_res_society = 
  metagen(TE = df_crime_plot_society$est,
          seTE = df_crime_plot_society$se, 
          sm = "SMD")

df_crime_plot = bind_rows(
  df_crime_plot,
  data.frame(
    est = meta_res_society$TE.random,
    se = meta_res_society$seTE.random,
    pv = meta_res_society$pval.random,
    crime_type = "Crime (Society)",
    city =  "Meta-Analysis"
  ),
  data.frame(
    est = meta_res_property$TE.random,
    se = meta_res_property$seTE.random,
    pv = meta_res_property$pval.random,
    crime_type = "Crime (Property)",
    city =  "Meta-Analysis"
  ),
  data.frame(
    est = meta_res_violent$TE.random,
    se = meta_res_violent$seTE.random,
    pv = meta_res_violent$pval.random,
    crime_type = "Crime (Violent)",
    city =  "Meta-Analysis"
  )
)

df_crime_plot$city = 
  factor(df_crime_plot$city,
         levels = c("AUS",  "LA", "PHL", "SEA", "Meta-Analysis"))

df_crime_plot %>% 
  ggplot() + 
  geom_point(aes(x = city, y = est,
                 shape = crime_type),
             position = position_dodge(.3),
             size = 2) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se, 
                    shape = crime_type),
                position = position_dodge(.3),
                width = 0,
                size = .2) + 
  geom_errorbar(aes(x = city, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se,
                    shape = crime_type),
                position = position_dodge(.3),
                width = 0,
                size = .4) + 
  geom_hline(yintercept = 0, size = .4) + 
  labs(x = "City", y = "Standardized Coefficient\n(BLM Protest)",
       shape = "Outcome") + 
  theme_tufte()

ggsave(plot = last_plot(), width = 6, height = 2.25,
       filename = "pics/figure5crimewc.jpeg", dpi = 600)



